library

library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(keras)
library(tensorflow)
library(corrplot)
## corrplot 0.92 loaded
library(plotly)
## 필요한 패키지를 로딩중입니다: ggplot2
## 
## 다음의 패키지를 부착합니다: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

tensorflow cpu 사용

tf$config$set_visible_devices(list(), "GPU")
tf$config$list_physical_devices()
## [[1]]
## PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')
## 
## [[2]]
## PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')

데이터 불러오기

hardness_data_set_tr = read.csv('Hardness data set_tr.csv')
hardness_data_set_val = read.csv('Hardness data set_val.csv')

데이터 확인

hardness_data_set_tr %>% str()
## 'data.frame':    800 obs. of  28 variables:
##  $ Number   : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ X        : int  -35 -35 -35 -35 -35 -35 -25 -25 -25 -25 ...
##  $ Y        : int  -25 25 15 -15 5 -5 35 -35 25 -25 ...
##  $ Al       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Ti       : num  26.9 46.8 42.9 30.1 37.8 ...
##  $ Cr       : num  13.3 12.1 13 13.6 14.2 ...
##  $ Fe       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Co       : num  59.8 41.1 44.1 56.2 48 ...
##  $ Ni       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Cu       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Zr       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Mo       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ W        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Mn       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Si       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Mg       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Re       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Ta       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Thickness: num  567 504 517 552 538 ...
##  $ Hardness : num  11.5 13 13.8 11.7 14.6 ...
##  $ Modulus  : num  197 209 224 232 192 ...
##  $ ravg     : num  0.137 0.145 0.143 0.138 0.141 ...
##  $ delta    : num  0.14 0.148 0.148 0.143 0.148 ...
##  $ dHmix    : num  -20.3 -23.9 -23.7 -21.4 -22.9 ...
##  $ ENavg    : num  1.78 1.73 1.74 1.77 1.75 ...
##  $ dEN      : num  0.122 0.125 0.125 0.123 0.125 ...
##  $ N        : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ Compo    : chr  "Co/Cr/Ti" "Co/Cr/Ti" "Co/Cr/Ti" "Co/Cr/Ti" ...
hardness_data_set_val %>% str()
## 'data.frame':    200 obs. of  28 variables:
##  $ Number   : int  801 802 803 804 805 806 807 808 809 810 ...
##  $ X        : int  -5 -5 5 5 5 5 15 15 15 15 ...
##  $ Y        : int  15 35 -35 -15 5 25 -25 -5 15 35 ...
##  $ Al       : num  55.2 59.3 40.2 44.9 48.6 ...
##  $ Ti       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Cr       : num  21.8 16.8 41.3 33.7 26.4 ...
##  $ Fe       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Co       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Ni       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Cu       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Zr       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Mo       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ W        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Mn       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Si       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Mg       : num  23 24 18.4 21.4 25 ...
##  $ Re       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Ta       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Thickness: num  498 513 414 454 480 ...
##  $ Hardness : num  7.15 7.14 7.03 6.63 7.08 ...
##  $ Modulus  : num  176 157 177 159 166 ...
##  $ ravg     : num  0.475 0.489 0.404 0.449 0.502 ...
##  $ delta    : num  0.699 0.706 0.658 0.687 0.717 ...
##  $ dHmix    : num  -1.0071 -1.2507 0.0672 0.102 0.2297 ...
##  $ ENavg    : num  1.55 1.55 1.58 1.56 1.55 ...
##  $ dEN      : num  0.134 0.134 0.128 0.134 0.139 ...
##  $ N        : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ Compo    : chr  "Mg/Al/Cr" "Mg/Al/Cr" "Mg/Al/Cr" "Mg/Al/Cr" ...

결측치 확인

table(is.na(hardness_data_set_tr)); table(is.na(hardness_data_set_val))
## 
## FALSE 
## 22400
## 
## FALSE  TRUE 
##  2927  2673

결측치 위치

제거

hardness_data_set_val = hardness_data_set_val[-c(101:199),]

summary

hardness_data_set_tr %>% summary()
##      Number            X                  Y                Al        
##  Min.   :  1.0   Min.   :-45.0000   Min.   :-43.00   Min.   :  0.00  
##  1st Qu.:200.8   1st Qu.:-15.0000   1st Qu.: -5.00   1st Qu.:  0.00  
##  Median :400.5   Median : -2.0000   Median :  0.00   Median :  0.00  
##  Mean   :400.5   Mean   : -0.6587   Mean   :  0.31   Mean   : 13.09  
##  3rd Qu.:600.2   3rd Qu.: 15.0000   3rd Qu.:  5.00   3rd Qu.: 15.45  
##  Max.   :800.0   Max.   : 35.0000   Max.   : 43.00   Max.   :100.00  
##        Ti               Cr               Fe               Co        
##  Min.   : 0.000   Min.   :  0.00   Min.   :  0.00   Min.   : 0.000  
##  1st Qu.: 0.000   1st Qu.:  0.00   1st Qu.:  0.00   1st Qu.: 0.000  
##  Median : 0.000   Median :  0.00   Median :  0.00   Median : 0.000  
##  Mean   : 6.953   Mean   : 19.48   Mean   : 19.74   Mean   : 4.323  
##  3rd Qu.: 0.000   3rd Qu.: 27.12   3rd Qu.: 32.27   3rd Qu.: 0.000  
##  Max.   :60.578   Max.   :100.00   Max.   :100.00   Max.   :67.523  
##        Ni               Cu                Zr               Mo        
##  Min.   :  0.00   Min.   :  0.000   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:  0.00   1st Qu.:  0.000   1st Qu.: 0.000   1st Qu.: 0.000  
##  Median :  0.00   Median :  0.000   Median : 0.000   Median : 0.000  
##  Mean   : 17.76   Mean   :  6.033   Mean   : 7.189   Mean   : 1.335  
##  3rd Qu.: 23.90   3rd Qu.:  0.000   3rd Qu.: 0.000   3rd Qu.: 0.000  
##  Max.   :100.00   Max.   :100.000   Max.   :83.826   Max.   :34.647  
##        W                 Mn          Si          Mg              Re         
##  Min.   :  0.000   Min.   :0   Min.   :0   Min.   : 0.00   Min.   : 0.0000  
##  1st Qu.:  0.000   1st Qu.:0   1st Qu.:0   1st Qu.: 0.00   1st Qu.: 0.0000  
##  Median :  0.000   Median :0   Median :0   Median : 0.00   Median : 0.0000  
##  Mean   :  2.954   Mean   :0   Mean   :0   Mean   : 0.74   Mean   : 0.2225  
##  3rd Qu.:  0.000   3rd Qu.:0   3rd Qu.:0   3rd Qu.: 0.00   3rd Qu.: 0.0000  
##  Max.   :100.000   Max.   :0   Max.   :0   Max.   :23.52   Max.   :13.6640  
##        Ta           Thickness         Hardness         Modulus      
##  Min.   : 0.000   Min.   :  91.4   Min.   : 2.297   Min.   : 95.37  
##  1st Qu.: 0.000   1st Qu.: 393.5   1st Qu.: 8.614   1st Qu.:164.02  
##  Median : 0.000   Median : 493.7   Median : 9.764   Median :185.49  
##  Mean   : 0.184   Mean   : 526.9   Mean   : 9.836   Mean   :182.72  
##  3rd Qu.: 0.000   3rd Qu.: 604.1   3rd Qu.:11.611   3rd Qu.:203.50  
##  Max.   :12.845   Max.   :1252.0   Max.   :19.368   Max.   :300.05  
##       ravg            delta              dHmix            ENavg      
##  Min.   :0.1240   Min.   :0.000000   Min.   :-43.91   Min.   :1.415  
##  1st Qu.:0.1247   1st Qu.:0.000000   1st Qu.:-19.83   1st Qu.:1.627  
##  Median :0.1280   Median :0.003976   Median : -3.39   Median :1.784  
##  Mean   :0.1445   Mean   :0.067295   Mean   :-10.47   Mean   :1.754  
##  3rd Qu.:0.1433   3rd Qu.:0.109187   3rd Qu.:  0.00   3rd Qu.:1.830  
##  Max.   :0.4906   Max.   :0.690492   Max.   :  0.00   Max.   :2.360  
##       dEN                N            Compo          
##  Min.   :0.00000   Min.   :1.000   Length:800        
##  1st Qu.:0.00000   1st Qu.:1.000   Class :character  
##  Median :0.09058   Median :3.000   Mode  :character  
##  Mean   :0.08502   Mean   :2.438                     
##  3rd Qu.:0.12439   3rd Qu.:3.000                     
##  Max.   :0.29124   Max.   :4.000
hardness_data_set_val %>% summary()
##      Number           X                Y                  Al       
##  Min.   : 801   Min.   :-35.00   Min.   :-43.0000   Min.   : 0.00  
##  1st Qu.: 826   1st Qu.:-15.00   1st Qu.:-25.0000   1st Qu.: 0.00  
##  Median : 851   Median :  5.00   Median :  5.0000   Median :40.24  
##  Mean   : 852   Mean   :  2.03   Mean   : -0.2277   Mean   :30.58  
##  3rd Qu.: 876   3rd Qu.: 15.00   3rd Qu.: 15.0000   3rd Qu.:47.65  
##  Max.   :1000   Max.   : 35.00   Max.   : 35.0000   Max.   :65.32  
##        Ti              Cr              Fe               Co          Ni        
##  Min.   : 0.00   Min.   : 0.00   Min.   : 0.000   Min.   :0   Min.   : 0.000  
##  1st Qu.: 0.00   1st Qu.: 0.00   1st Qu.: 0.000   1st Qu.:0   1st Qu.: 0.000  
##  Median :18.41   Median : 0.00   Median : 0.000   Median :0   Median : 0.000  
##  Mean   :17.86   Mean   :16.68   Mean   : 9.667   Mean   :0   Mean   : 0.227  
##  3rd Qu.:35.07   3rd Qu.:32.35   3rd Qu.:23.582   3rd Qu.:0   3rd Qu.: 0.000  
##  Max.   :54.87   Max.   :54.71   Max.   :42.238   Max.   :0   Max.   :22.925  
##        Cu          Zr          Mo          W           Mn          Si   
##  Min.   :0   Min.   :0   Min.   :0   Min.   :0   Min.   :0   Min.   :0  
##  1st Qu.:0   1st Qu.:0   1st Qu.:0   1st Qu.:0   1st Qu.:0   1st Qu.:0  
##  Median :0   Median :0   Median :0   Median :0   Median :0   Median :0  
##  Mean   :0   Mean   :0   Mean   :0   Mean   :0   Mean   :0   Mean   :0  
##  3rd Qu.:0   3rd Qu.:0   3rd Qu.:0   3rd Qu.:0   3rd Qu.:0   3rd Qu.:0  
##  Max.   :0   Max.   :0   Max.   :0   Max.   :0   Max.   :0   Max.   :0  
##        Mg              Re          Ta      Thickness        Hardness     
##  Min.   : 0.00   Min.   :0   Min.   :0   Min.   :279.9   Min.   : 4.410  
##  1st Qu.:20.43   1st Qu.:0   1st Qu.:0   1st Qu.:418.7   1st Qu.: 6.860  
##  Median :25.00   Median :0   Median :0   Median :452.7   Median : 7.293  
##  Mean   :24.99   Mean   :0   Mean   :0   Mean   :450.3   Mean   : 7.252  
##  3rd Qu.:30.26   3rd Qu.:0   3rd Qu.:0   3rd Qu.:483.2   3rd Qu.: 7.723  
##  Max.   :39.29   Max.   :0   Max.   :0   Max.   :574.8   Max.   :11.739  
##     Modulus           ravg            delta            dHmix         
##  Min.   :122.0   Min.   :0.1474   Min.   :0.1458   Min.   :-21.9551  
##  1st Qu.:148.9   1st Qu.:0.4381   1st Qu.:0.6686   1st Qu.: -4.3164  
##  Median :159.9   Median :0.5064   Median :0.7128   Median : -0.4221  
##  Mean   :159.3   Mean   :0.5066   Mean   :0.6977   Mean   :  1.0934  
##  3rd Qu.:171.4   3rd Qu.:0.5837   3rd Qu.:0.7441   3rd Qu.: 10.5022  
##  Max.   :186.7   Max.   :0.7188   Max.   :0.7874   Max.   : 16.3099  
##      ENavg            dEN               N        Compo          
##  Min.   :1.509   Min.   :0.1065   Min.   :3   Length:101        
##  1st Qu.:1.539   1st Qu.:0.1391   1st Qu.:3   Class :character  
##  Median :1.558   Median :0.1488   Median :3   Mode  :character  
##  Mean   :1.567   Mean   :0.1541   Mean   :3                     
##  3rd Qu.:1.581   3rd Qu.:0.1700   3rd Qu.:3                     
##  Max.   :1.696   Max.   :0.2028   Max.   :3

상관계수 그래프

cor_data = cor(hardness_data_set_tr[,c(2:26)][,-c(13:14)], method = 'pearson')
cor_data %>% corrplot(method = 'number')

box_그래프

names_list = names(hardness_data_set_tr[,c(2:26)])

훈련데이터의 box그래프

tr_fig = plot_ly(y = hardness_data_set_tr[,2], type = 'box', quartilemethod="linear", name = names_list[1])

j = 2
for(i in 3:26){
  tr_fig = 
    tr_fig %>% 
    add_trace(y = hardness_data_set_tr[,i], type = 'box', quartilemethod="linear", name = names_list[j])
  
  j = j +1
}
tr_fig

검증데이터의 box그래프

val_fig = plot_ly(y = hardness_data_set_val[,2], type = 'box', quartilemethod="linear", name = names_list[1])

j = 2
for(i in 3:26){
  val_fig = 
    val_fig %>% 
    add_trace(y = hardness_data_set_val[,i], type = 'box', quartilemethod="linear", name = names_list[j])
  
  j = j +1
}
val_fig
sel_hardness_data_set_tr = hardness_data_set_tr[,c(2:26)]
sel_hardness_data_set_val = hardness_data_set_val[,c(2:26)]

max_pos = 45 *2; min_pos = 0
sel_hardness_data_set_tr$X = sel_hardness_data_set_tr$X +45; sel_hardness_data_set_tr$Y = sel_hardness_data_set_tr$Y +45; 
sel_hardness_data_set_tr$X = (sel_hardness_data_set_tr$X -min_pos)/(max_pos -min_pos)
sel_hardness_data_set_tr$Y = (sel_hardness_data_set_tr$Y -min_pos)/(max_pos -min_pos)

sel_hardness_data_set_val$X = sel_hardness_data_set_val$X +45; sel_hardness_data_set_val$Y = sel_hardness_data_set_val$Y +45; 
sel_hardness_data_set_val$X = (sel_hardness_data_set_val$X -min_pos)/(max_pos -min_pos)
sel_hardness_data_set_val$Y = (sel_hardness_data_set_val$Y -min_pos)/(max_pos -min_pos)

sel_hardness_data_set_tr[,3:17] = sel_hardness_data_set_tr[,3:17] *0.01
sel_hardness_data_set_val[,3:17] = sel_hardness_data_set_val[,3:17] *0.01

for(i in 18:ncol(sel_hardness_data_set_tr)){
  max_value = max(sel_hardness_data_set_tr[,i])
  min_value = min(sel_hardness_data_set_tr[,i])
  
  sel_hardness_data_set_tr[,i] = (sel_hardness_data_set_tr[,i] -min_value)/(max_value -min_value)
  sel_hardness_data_set_val[,i] = (sel_hardness_data_set_val[,i] -min_value)/(max_value -min_value)
}

summary(sel_hardness_data_set_tr); summary(sel_hardness_data_set_val)
##        X                Y                 Al               Ti         
##  Min.   :0.0000   Min.   :0.02222   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.3333   1st Qu.:0.44444   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.4778   Median :0.50000   Median :0.0000   Median :0.00000  
##  Mean   :0.4927   Mean   :0.50344   Mean   :0.1309   Mean   :0.06953  
##  3rd Qu.:0.6667   3rd Qu.:0.55556   3rd Qu.:0.1545   3rd Qu.:0.00000  
##  Max.   :0.8889   Max.   :0.97778   Max.   :1.0000   Max.   :0.60578  
##        Cr               Fe               Co                Ni        
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.00000   Median :0.0000  
##  Mean   :0.1948   Mean   :0.1974   Mean   :0.04323   Mean   :0.1776  
##  3rd Qu.:0.2712   3rd Qu.:0.3227   3rd Qu.:0.00000   3rd Qu.:0.2390  
##  Max.   :1.0000   Max.   :1.0000   Max.   :0.67523   Max.   :1.0000  
##        Cu                Zr                Mo                W          
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.06033   Mean   :0.07189   Mean   :0.01335   Mean   :0.02954  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :0.83826   Max.   :0.34647   Max.   :1.00000  
##        Mn          Si          Mg               Re                 Ta         
##  Min.   :0   Min.   :0   Min.   :0.0000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.:0   1st Qu.:0   1st Qu.:0.0000   1st Qu.:0.000000   1st Qu.:0.00000  
##  Median :0   Median :0   Median :0.0000   Median :0.000000   Median :0.00000  
##  Mean   :0   Mean   :0   Mean   :0.0074   Mean   :0.002225   Mean   :0.00184  
##  3rd Qu.:0   3rd Qu.:0   3rd Qu.:0.0000   3rd Qu.:0.000000   3rd Qu.:0.00000  
##  Max.   :0   Max.   :0   Max.   :0.2352   Max.   :0.136640   Max.   :0.12845  
##    Thickness         Hardness         Modulus            ravg         
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000000  
##  1st Qu.:0.2603   1st Qu.:0.3700   1st Qu.:0.3354   1st Qu.:0.001848  
##  Median :0.3466   Median :0.4374   Median :0.4403   Median :0.010911  
##  Mean   :0.3752   Mean   :0.4416   Mean   :0.4267   Mean   :0.055824  
##  3rd Qu.:0.4418   3rd Qu.:0.5456   3rd Qu.:0.5283   3rd Qu.:0.052628  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.000000  
##      delta              dHmix            ENavg             dEN        
##  Min.   :0.000000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.000000   1st Qu.:0.5484   1st Qu.:0.2244   1st Qu.:0.0000  
##  Median :0.005759   Median :0.9228   Median :0.3909   Median :0.3110  
##  Mean   :0.097460   Mean   :0.7615   Mean   :0.3587   Mean   :0.2919  
##  3rd Qu.:0.158130   3rd Qu.:1.0000   3rd Qu.:0.4392   3rd Qu.:0.4271  
##  Max.   :1.000000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000
##        X                Y                 Al               Ti        
##  Min.   :0.1111   Min.   :0.02222   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.3333   1st Qu.:0.22222   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.5556   Median :0.55556   Median :0.4024   Median :0.1841  
##  Mean   :0.5226   Mean   :0.49747   Mean   :0.3058   Mean   :0.1786  
##  3rd Qu.:0.6667   3rd Qu.:0.66667   3rd Qu.:0.4765   3rd Qu.:0.3507  
##  Max.   :0.8889   Max.   :0.88889   Max.   :0.6532   Max.   :0.5487  
##        Cr               Fe                Co          Ni                Cu   
##  Min.   :0.0000   Min.   :0.00000   Min.   :0   Min.   :0.00000   Min.   :0  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0   1st Qu.:0.00000   1st Qu.:0  
##  Median :0.0000   Median :0.00000   Median :0   Median :0.00000   Median :0  
##  Mean   :0.1668   Mean   :0.09667   Mean   :0   Mean   :0.00227   Mean   :0  
##  3rd Qu.:0.3235   3rd Qu.:0.23582   3rd Qu.:0   3rd Qu.:0.00000   3rd Qu.:0  
##  Max.   :0.5471   Max.   :0.42238   Max.   :0   Max.   :0.22925   Max.   :0  
##        Zr          Mo          W           Mn          Si          Mg        
##  Min.   :0   Min.   :0   Min.   :0   Min.   :0   Min.   :0   Min.   :0.0000  
##  1st Qu.:0   1st Qu.:0   1st Qu.:0   1st Qu.:0   1st Qu.:0   1st Qu.:0.2043  
##  Median :0   Median :0   Median :0   Median :0   Median :0   Median :0.2500  
##  Mean   :0   Mean   :0   Mean   :0   Mean   :0   Mean   :0   Mean   :0.2499  
##  3rd Qu.:0   3rd Qu.:0   3rd Qu.:0   3rd Qu.:0   3rd Qu.:0   3rd Qu.:0.3026  
##  Max.   :0   Max.   :0   Max.   :0   Max.   :0   Max.   :0   Max.   :0.3929  
##        Re          Ta      Thickness         Hardness         Modulus      
##  Min.   :0   Min.   :0   Min.   :0.1625   Min.   :0.1238   Min.   :0.1299  
##  1st Qu.:0   1st Qu.:0   1st Qu.:0.2820   1st Qu.:0.2673   1st Qu.:0.2614  
##  Median :0   Median :0   Median :0.3113   Median :0.2927   Median :0.3154  
##  Mean   :0   Mean   :0   Mean   :0.3093   Mean   :0.2903   Mean   :0.3124  
##  3rd Qu.:0   3rd Qu.:0   3rd Qu.:0.3376   3rd Qu.:0.3178   3rd Qu.:0.3712  
##  Max.   :0   Max.   :0   Max.   :0.4165   Max.   :0.5531   Max.   :0.4461  
##       ravg             delta            dHmix            ENavg        
##  Min.   :0.06376   Min.   :0.2111   Min.   :0.5000   Min.   :0.09975  
##  1st Qu.:0.85670   1st Qu.:0.9683   1st Qu.:0.9017   1st Qu.:0.13110  
##  Median :1.04304   Median :1.0323   Median :0.9904   Median :0.15128  
##  Mean   :1.04373   Mean   :1.0105   Mean   :1.0249   Mean   :0.16105  
##  3rd Qu.:1.25412   3rd Qu.:1.0776   3rd Qu.:1.2392   3rd Qu.:0.17554  
##  Max.   :1.62254   Max.   :1.1404   Max.   :1.3714   Max.   :0.29795  
##       dEN        
##  Min.   :0.3657  
##  1st Qu.:0.4775  
##  Median :0.5109  
##  Mean   :0.5293  
##  3rd Qu.:0.5838  
##  Max.   :0.6964

결측치 확인

table(is.na(sel_hardness_data_set_tr)); table(is.na(sel_hardness_data_set_val))
## 
## FALSE 
## 20000
## 
## FALSE 
##  2525

정규화된 bar그래프

sel_name_list = names(sel_hardness_data_set_tr)
nor_fig_tr = plot_ly(y = sel_hardness_data_set_tr[,1], type = 'box', quartilemethod="linear", name = sel_name_list[1])
nor_fig_val = plot_ly(y = sel_hardness_data_set_val[,1], type = 'box', quartilemethod="linear", name = sel_name_list[1])
for(i in 2:(ncol(sel_hardness_data_set_tr) -0) ){
  nor_fig_tr = 
    nor_fig_tr %>% 
    add_trace(y = sel_hardness_data_set_tr[,i], type = 'box', quartilemethod="linear", name = sel_name_list[i])
  
  nor_fig_val = 
    nor_fig_val %>% 
    add_trace(y = sel_hardness_data_set_val[,i], type = 'box', quartilemethod="linear", name = sel_name_list[i])
}
nor_fig_tr
nor_fig_val

데이터셋을 행렬로 변환

x_train = as.matrix(sel_hardness_data_set_tr[,-c(19)])
y_train = matrix(sel_hardness_data_set_tr$Hardness, ncol = 1)

x_val = as.matrix(sel_hardness_data_set_val[,-c(19)])
y_val = matrix(sel_hardness_data_set_val$Hardness, ncol = 1)

랜덤값 고정

set.seed(7)
tf$random$set_seed(7)

기본 파라미터

act = 'selu'
batch.size = 32

모델생성

k_clear_session()

input = layer_input(shape = ncol(x_train), name = 'input')

att_layer =
  input %>%
  layer_dense(units = ncol(x_train), name = 'att_score') %>%
  layer_activation_softmax()

mul_layer = layer_multiply(inputs = list(input, att_layer))

hidden = 
  mul_layer %>% 
  layer_dense(units = ncol(x_train) *8, activation = act, name = 'hidden_1') %>% 
  layer_dense(units = ncol(x_train) *4, activation = act, name = 'hidden_2') %>% 
  layer_dense(units = ncol(x_train), activation = act, name = 'hidden_3')

output_att = layer_multiply(inputs = list(hidden, att_layer))

output = 
  output_att %>% 
  layer_dense(units = 1, name = 'output')

simple_att_model = keras_model(inputs = input, outputs = output, name = 'simple_att_model')
summary(simple_att_model)
## Model: "simple_att_model"
## ________________________________________________________________________________
##  Layer (type)             Output Shape      Param #  Connected to               
## ================================================================================
##  input (InputLayer)       [(None, 24)]      0        []                         
##  att_score (Dense)        (None, 24)        600      ['input[0][0]']            
##  softmax (Softmax)        (None, 24)        0        ['att_score[0][0]']        
##  multiply (Multiply)      (None, 24)        0        ['input[0][0]',            
##                                                       'softmax[0][0]']          
##  hidden_1 (Dense)         (None, 192)       4800     ['multiply[0][0]']         
##  hidden_2 (Dense)         (None, 96)        18528    ['hidden_1[0][0]']         
##  hidden_3 (Dense)         (None, 24)        2328     ['hidden_2[0][0]']         
##  multiply_1 (Multiply)    (None, 24)        0        ['hidden_3[0][0]',         
##                                                       'softmax[0][0]']          
##  output (Dense)           (None, 1)         25       ['multiply_1[0][0]']       
## ================================================================================
## Total params: 26,281
## Trainable params: 26,281
## Non-trainable params: 0
## ________________________________________________________________________________

모델 plot

plot(simple_att_model)

lr 스케줄러 함수 정의

lr_schedule = function(epoch, lr) {
  return(lr * 0.999)
}
lr_scheduler <- callback_learning_rate_scheduler(schedule = lr_schedule)

early stopping 설정

early_stopping <- callback_early_stopping(
  monitor = 'val_loss',
  patience = 200L,
  restore_best_weights = TRUE
)

모델 훈련

loss 그래프 확인

loss_history = simple_att_model$history$history
plot_ly(x = 1:length(loss_history$loss), y = loss_history$loss, type = 'scatter', mode = 'line', name = 'tr_loss') %>% 
  add_trace(x = 1:length(loss_history$loss), y = loss_history$val_loss, type = 'scatter', mode = 'line', name = 'val_loss')

모델의 R2(결정계수)계산, SSE/SST

att_model_fitted_value = simple_att_model %>% predict(x_train, batch_size = batch.size)
## 25/25 - 0s - 94ms/epoch - 4ms/step
SST = sum((y_train -mean(y_train))^2)
SSE = sum((att_model_fitted_value -mean(y_train))^2)
print(SSE/SST)
## [1] 0.8470726

모델이 훈련시 어떤 변수에 가중치를 주었는지 확인

att_score_out = keras_model(inputs = input, outputs = att_layer)
att_score = att_score_out %>% predict(x_val, batch_size = batch.size)
## 4/4 - 0s - 36ms/epoch - 9ms/step
df_att_score = att_score %>% data.frame()
names(df_att_score) = sel_name_list[-19]

개별 데어터의 attention score 확인

row_num = sample(1:nrow(att_score), size = 4); row_num
## [1] 42 83 31 92
att_sc_list = list()
for(i in 1:4){
  att_sc_list[[i]] = plot_ly(x = factor(sel_name_list[-19], levels = sel_name_list[-19]), y = att_score[row_num[i],], type = 'bar')
}
subplot(att_sc_list[[1]], att_sc_list[[2]], att_sc_list[[3]], att_sc_list[[4]], nrows = 2)

전체 데이터의 attention score 확인

View_graph = function(graph_data){
  graph_data = data.frame(graph_data)
  x = 1:nrow(graph_data)
  col_num = ncol(graph_data)
  if(ncol(graph_data) == 1){
    pl_graph = plot_ly(x = x, y = as.matrix(graph_data[,1])[,1], type = 'scatter', mode = 'markers', name = names(graph_data)[1])
  }else{
    pl_graph = plot_ly(x = x, y = as.matrix(graph_data[,1])[,1], type = 'scatter', mode = 'markers', name = names(graph_data)[1])
    for(i in 2:ncol(graph_data)){
      pl_graph = pl_graph %>% add_trace(x = x, y = as.matrix(graph_data[,i])[,1], type = 'scatter', mode = 'markers', name = names(graph_data)[i])
    }
  }
  return(pl_graph)
}

View_graph(df_att_score)

모델 예측

prediction = simple_att_model %>% predict(x_val, batch_size = batch.size)
## 4/4 - 0s - 58ms/epoch - 14ms/step

결과값의 역 정규화

max_value = max(hardness_data_set_tr$Hardness); min_value = min(hardness_data_set_tr$Hardness)
y_val = (y_val +min_value) *(max_value -min_value)
prediction = (prediction +min_value) *(max_value -min_value)

histogram 그래프 확인

fig_hist = plot_ly(alpha = 0.6)
fig_hist = 
  fig_hist %>% 
  add_histogram(x = y_val[,1], name = 'y') %>% 
  add_histogram(x = prediction[,1], name = 'prediction') %>% 
  layout(barmode = "overlay")
fig_hist

실제값과 예측값의 점 그래프 확인

plot_ly(x = 1:nrow(y_val), y = y_val, type = 'scatter', mode = 'markers', name = 'y') %>% 
  add_trace(x = 1:nrow(y_val), y = prediction, type = 'scatter', mode = 'markers', name = 'prediction')